The purpose of this report is to consolidate the basic data visualization skills with the Plotly package. Periodically, I will add new graphics using the new features. I really like the Plotly package, because it allows you to open a lot of hidden information in the data due to its interactivity.
Mass Shootings in the United States of America (1966-2017) The US has witnessed 398 mass shootings in last 50 years that resulted in 1996 deaths and 2488 injured. The latest and the worst mass shooting of October 2, 2017 killed 58 and injured 515 so far. The number of people injured in this attack is more than the number of people injured in all mass shootings of 2015 and 2016 combined. The average number of mass shootings per year is 7 for the last 50 years that would claim 39 lives and 48 injured per year.
Download the necessary packages for the manipulation and visualization of data
library(data.table)
library(readr)
library(plotly)
library(ggplot2)
library(maps)
library(tm)
library(wordcloud)
Load the data from source. Perform the first manipulations with the date. Extract the year and month from the date and combine the variables of gender
MS_dataset <- read_csv("./us-mass-shootings-last-50-years/Mass Shootings Dataset Ver 5.csv"
, col_types = cols(Date = col_date(format = "%m/%d/%Y")))
MS_dataset <- data.table(MS_dataset)
MS_dataset[,Month:=as.factor(month(Date))]
MS_dataset[,Year_n:=as.numeric(year(Date))]
MS_dataset[,Year:=as.factor(year(Date))]
MS_dataset[Gender=='M',Gender:="Male"]
MS_dataset[Gender=='M/F',Gender:="Male/Female"]
MS_dataset[is.na(Gender),Gender:="Unknown"]
MS_dataset[,Gender:=as.factor(Gender)]
plot_ly(data = MS_dataset
,type = 'scatter'
,mode = 'markers'
,hoverinfo = 'text'
,x = ~Month
,y = ~Year
,size = ~`Total victims`
,color = ~Gender
,colors = c('Red', 'Blue', 'Green', 'Black')
,alpha = 0.6
,text = ~paste("Location: ", Location
,'\n Date: ', Date
,'\n Total victims : ', `Total victims`
,'\n Fatalities : ', Fatalities
,'\n Injured : ', Injured)) %>%
layout(title = "Mass Shootings in US by years and month"
, xaxis = list(title = "Month")
, yaxis = list(title = "Years"))
# Font Settings
f1 <- list(
family = "Arial, sans-serif",
size = 14,
color = "grey"
)
f2 <- list(
family = "Old Standard TT, serif",
size = 12,
color = "black"
)
# Axis settings
ax <- list(
title = "Month",
titlefont = f1,
showticklabels = TRUE,
tickangle = 0,
tickfont = f2,
exponentformat = "E"
)
ay <- list(
title = "Year",
titlefont = f1,
showticklabels = TRUE,
tickangle = 0,
tickfont = f2,
exponentformat = "E"
)
b1 <- list(
text = "Total victims",
font = f1,
xref = "paper",
yref = "paper",
yanchor = "bottom",
xanchor = "center",
align = "center",
x = 0.5,
y = 1,
showarrow = FALSE
)
b2 <- list(
text = "Injured",
font = f1,
xref = "paper",
yref = "paper",
yanchor = "bottom",
xanchor = "center",
align = "center",
x = 0.5,
y = 1,
showarrow = FALSE
)
b3 <- list(
text = "Fatalities",
font = f1,
xref = "paper",
yref = "paper",
yanchor = "bottom",
xanchor = "center",
align = "center",
x = 0.5,
y = 1,
showarrow = FALSE
)
hm1 <-
#plot_ly(data = MS_dataset[`S#`!=1,]
plot_ly(data = MS_dataset
,type = 'heatmap'
,colors = colorRamp(c("yellow", "blue", "darkred"))
,x = ~Month
,y = ~Year
,z = ~`Total victims`)%>%
layout(showlegend = T
, xaxis = ax
, yaxis = ay
, annotations = b1)
hm2 <-
#plot_ly(data = MS_dataset[`S#`!=1,]
plot_ly(data = MS_dataset
,type = 'heatmap'
,colors = colorRamp(c("grey", "darkgrey", "black"))
,x = ~Month
,y = ~Year
,z = ~`Injured`)%>%
layout(showlegend = T
, xaxis = ax
, annotations = b2
, yaxis = list(
title = "",
zeroline = FALSE,
showline = FALSE,
showticklabels = FALSE,
showgrid = FALSE
))
hm3 <-
#plot_ly(data = MS_dataset[`S#`!=1,]
plot_ly(data = MS_dataset
,type = 'heatmap'
,colors = colorRamp(c("orange", "darkred", "black"))
,x = ~Month
,y = ~Year
,z = ~Fatalities)%>%
layout(showlegend = T
, xaxis = ax
, annotations = b3
, yaxis = list(
title = "",
zeroline = FALSE,
showline = FALSE,
showticklabels = FALSE,
showgrid = FALSE
))
subplot(hm1, hm2, hm3)
plot_ly(data = MS_dataset
,type = 'bar'
,mode = 'markers'
,hoverinfo = 'text'
,x = ~Year
,y = ~ `Total victims`
,color = 'Red'
,alpha = 0.9
,text = ~paste(
'Fatalities : ', Fatalities
,'\n Injured : ', Injured
)) %>%
layout(title = "Number of Total victims by years"
, xaxis = list(title = "")
, yaxis = list(title = "Number of victims"))
plot_ly(data = MS_dataset
,type = 'histogram'
,mode = 'markers'
,x = ~Year
,alpha = 0.9) %>%
layout(title = "Number of incidents by years"
, xaxis = list(title = "")
, yaxis = list(title = "Number of incidents"))
plot_ly(data = MS_dataset
,type = 'histogram'
,mode = 'markers'
,x = ~Month
,alpha = 0.9) %>%
layout(title = "Number of incidents by month"
, xaxis = list(title = "Month")
, yaxis = list(title = "Number of incidents"))
plot_ly(data = MS_dataset[!is.na(Summary),]
,type = 'scatter'
,mode = 'markers'
,hoverinfo = 'text'
,x = ~Fatalities
,y = ~Injured
,color = ~`Total victims`
# ,colors = "Reds"
,colors = colorRamp(c("darkgreen", "yellow", "darkred"))
,alpha = 0.6
,text = ~paste("Title: ", Title
,'\n Date: ', Date
,'\n Total victims : ', `Total victims`
,'\n Fatalities : ', Fatalities
,'\n Injured : ', Injured)) %>%
layout(title = "Fatalities & Injured (without \"Las Vegas Strip mass shooting\")"
, xaxis = list(title = "Fatalities")
, yaxis = list(title = "Injured")
, plot_bgcolor='LightGrey')
MS_dataset[`Mental Health Issues`=="unknown",`Mental Health Issues`:="Unknown"]
# set collors for first pie chart
colors_pie1 <- c('rgb(211,94,96)', 'rgb(128,133,133)', 'rgb(144,103,167)', 'rgb(171,104,87)', 'rgb(114,147,203)')
plot_ly(data = MS_dataset[,.(`Total victims`,`Mental Health Issues`)]
,type = 'pie'
,labels = ~`Mental Health Issues`
,values = ~`Total victims`
,textposition = 'inside'
,insidetextfont = list(color = '#FFFFFF')
,marker = list(colors = colors_pie1,
line = list(color = '#FFFFFF', width = 1)))%>%
layout(title = "Mental Health Issues",
showlegend = T)
MS_dataset$State <- sapply(MS_dataset$Location, function(x){
temp <- strsplit(x, split = ",")
sapply(temp, function(y){y[2]
})
})
plot_ly(data = MS_dataset[!is.na(State),.('Number of incidents'= uniqueN(`S#`)),by=State]
,type = 'pie'
,labels = ~State
,values = ~`Number of incidents`
,textposition = 'inside'
,insidetextfont = list(color = '#FFFFFF')
,marker = list(colors = colors_pie1,
line = list(color = '#FFFFFF', width = 1)))%>%
layout(title = "Number of incidents by States",
showlegend = T)
# Clearing and merging data in the Race field
MS_dataset[Race=="unclear",Race:="Unknown"]
MS_dataset[is.na(Race),Race:="White"]
MS_dataset[Race=="Black American or African American"
| Race=="black"
| Race=="Black American or African American/Unknown"
,Race:="Black"]
MS_dataset[Race=="White American or European American"
| Race=="White American or European American/Some other Race"
| Race=="white"
,Race:="White"]
MS_dataset[Race=="Asian American"
| Race=="Asian American/Some other race"
,Race:="Asian"]
MS_dataset[Race=="Unknown",Race:="Other"]
MS_dataset[Race=="Two or more races",Race:="Other"]
MS_dataset[Race=="Some other race",Race:="Other"]
MS_dataset[Race=="Native American or Alaska Native",Race:="Native American"]
plot_ly(data = MS_dataset[Title!='Las Vegas Strip mass shooting',]
,type = 'box'
,mode = 'markers'
,x = ~`Total victims`
,color =~Race
,alpha = 0.9) %>%
layout(title = "Total victims by Race (without \"Las Vegas Strip mass shooting\")"
, showlegend = T
, xaxis = list(title = "Number of victims")
, yaxis = list(title = ""))
plot_ly(data = MS_dataset[,.('Total victims'= sum(`Total victims`)),by=.(Race,Year)]
,type = 'bar'
,mode = 'markers'
,x = ~Year
,y = ~`Total victims`
,color =~Race
,alpha = 0.9) %>%
layout(title = "Total victims by Race"
, showlegend = T
, barmode = 'stack'
, position = 1
, xaxis = list(title = "")
, yaxis = list(title = "")
, legend = list(x = 0, y = 1)
, hovermode = 'compare')
plot_geo()g <- list(
scope = 'usa'
, projection = list(type = 'albers usa')
, showland = TRUE
, landcolor = 'grey'
, subunitwidth = 1
, countrywidth = 1
# , subunitcolor = toRGB("white")
# , countrycolor = toRGB("white")
)
plot_geo(MS_dataset
#, locationmode = 'USA-states'
, sizes = c(10, 300)) %>%
add_markers(
x = ~Longitude
, y = ~Latitude
, size = ~`Total victims`
, color = ~Fatalities
, colors = colorRamp(c("yellow", "red", "black"))
, hoverinfo = "text"
, text = ~paste(MS_dataset$Title
, '\n Fatalities: ', MS_dataset$Fatalities
, '\n Injured: ', MS_dataset$Injured)
) %>%
layout(title = 'Geography of Mass Shooting in US', geo = g)
ggplot2() + maps() + ggplotly()#load us map data
all_states <- map_data("state")
#plot all states with ggplot
p <- ggplot()
p <- p + geom_polygon(data=all_states, aes(x=long, y=lat, group = group),colour="black", fill="white")
p <-
p + geom_point(data=MS_dataset[Longitude >=-140,]
, aes(x=Longitude, y=Latitude
,size = `Total victims`
,color = Fatalities)
,alpha = 0.6) +
scale_color_gradient(low = "red", high = "black") +
ggtitle("Total victims & Fatalities on US map")
ggplotly(
p
)
MS_dataset$century <-
sapply(MS_dataset$Year_n, function(x){
ifelse(x>=2010,"7) 10th",
ifelse(x>=2000,"6) 00th",
ifelse(x>=1990,"4) 90th",
ifelse(x>=1980,"3) 80th",
ifelse(x>=1970,"2) 70th","1) 60th")))))
})
plot_ly(data = MS_dataset[Title!='Las Vegas Strip mass shooting',]
,type = 'scatter'
,mode = 'markers'
,hoverinfo = 'text'
,x = ~Fatalities
,y = ~Injured
,frame = ~century
,size = ~`Total victims`
,color = ~'DarkRed'
,colors = c('DarkRed')
,alpha = 0.9
,text = ~paste("Location: ", Location
,'\n Date: ', Date
,'\n Total victims : ', `Total victims`
,'\n Injured : ', Injured
,'\n Fatalities : ', Fatalities)) %>%
layout(title = "Animated History of incidents by centuries"
, xaxis = list(title = "Fatalities")
, yaxis = list(title = "Injured")
, showlegend = F
) %>%
animation_opts(
1500, redraw = FALSE
)
accumulate_by <- function(dat, var) {
var <- lazyeval::f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
dplyr::bind_rows(dats)
}
MS_dataset_V <- MS_dataset[`S#`!=1,.(Date,Year_n,`Total victims`)]
MS_dataset_V$Type <- 'Total victims'
MS_dataset_I <- MS_dataset[`S#`!=1,.(Date,Year_n, Injured)]
MS_dataset_I$Type <- 'Injured'
MS_dataset_F <- MS_dataset[`S#`!=1,.(Date,Year_n, Fatalities)]
MS_dataset_F$Type <- 'Fatalities'
MS_dataset_VIF <- rbindlist(l = list(MS_dataset_V,MS_dataset_I,MS_dataset_F))
# MS_dataset_RY$Year <- year(MS_dataset$Date)
MS_dataset_VIF <- MS_dataset_VIF[,.('Total victims'= sum(`Total victims`)),by=.(Year_n,Type)]
MS_dataset_VIF <- MS_dataset_VIF %>% accumulate_by(~Year_n)
plot_ly(data = MS_dataset_VIF
,type = 'scatter'
,mode = 'lines'
,line = list(simplyfy = F)
,x = ~Year_n
,y = ~`Total victims`
,color =~Type
,colors = c('Red','Orange','Black')
,frame = ~frame
,alpha = 0.9) %>%
layout(title = "Animated History of incidents"
, showlegend = T
, xaxis = list(title = "")
, yaxis = list(title = "")
,legend = list(x = 0.1, y = 0.9)
,hovermode = 'compare')%>%
animation_opts(
frame = 200,
transition = 0,
redraw = FALSE
) %>%
animation_slider(
hide = F
) %>%
animation_button(
x = 1, xanchor = "right", y = 0, yanchor = "bottom"
)
a1 <-
ggplot(data = MS_dataset[!is.na(Age)&Age!=0&Age<=70,], aes(x = Race, y = Age)) +
geom_boxplot(aes(col = Race)) +
ggtitle("Age of the shooter & Race") +
labs(x = "Race", y = "Age") +
theme(axis.text.x = element_text(angle = 0
, size = 9
, color = 'black'
, hjust = 1),
legend.position="none") +
geom_hline(aes(yintercept = median(Age))
, colour = 'red'
, linetype = 2
, alpha = 0.5) +
geom_hline(aes(yintercept = mean(Age))
, colour = 'blue'
, linetype = 2
, alpha = 0.5)
ggplotly(
a1
)